library(psych)
library(summarytools)
library(performance)
library(MASS)
library(dplyr)
library(rstatix)
library(lmtest)
library(ggpubr)
library(nnet)
library(tidyr)
library(car)
library(agricolae)
library(ExpDes.pt)
library(hnp)
df <- read.csv('https://raw.githubusercontent.com/JessicaHemily/COMP_1/main/LeukocyteProfiles.csv/LeukocyteProfiles.csv',
header = TRUE,
sep = ',')
head(df)
## Species FatScore BodyMass Heterophils Lymphocytes Eosinophils
## 1 Actitis hypoleucos 5 56 64 19 5
## 2 Actitis hypoleucos 1 44 60 34 0
## 3 Actitis hypoleucos 2 46 63 28 5
## 4 Actitis hypoleucos 3 47 2 87 4
## 5 Actitis hypoleucos 3 44 68 24 6
## 6 Actitis hypoleucos 5 45 42 45 6
## Monocytes Basophils HLRatio
## 1 0 12 3.37
## 2 1 5 1.76
## 3 0 4 2.25
## 4 2 5 0.02
## 5 0 2 2.83
## 6 3 4 0.93
#df_2 <- df[df$Species == "Calidris temminckii",]
#linha <- nrow(df_2)
#linha
VARIÁVEIS
FatScore: Escore de gordura que varia de 0 a 8;
BodyMass: Massa do pássaro;
Heterophils: Heterófilos são fagócitos-chaves fundamentais para a defesa imunológica das aves. Eles se ligam e detectam patógenos invasores através do uso de receptores Toll-like (TLRs), receptores Fc e receptores de complemento;
Lymphocytes: Linfócitos são um tipo de leucócito ou glóbulo branco do sangue, responsáveis pelo reconhecimento e destruição de micro-organismos infecciosos como bactériase vírus;
Eosinophils: Os eosinófilos são um dos vários glóbulos brancos que sustentam o sistema imunológico e fazem parte do sistema de defesa do seu corpo contra alérgenos além de ajudar a protegê-lo contra infecções fúngicas e parasitárias. Comp: Dependendo da contagem de eosinófilos, a eosinofilia pode ser leve, moderada ou grave. Níveis elevados de eosinófilos podem indicar uma condição leve, como reação a medicamentos ou alergia, ou uma condição grave pode causar isso, incluindo algumas doenças do sangue. Um grande número de eosinófilos se aglomera em áreas específicas do corpo, causando problemas médicos ligados à inflamação que podem afetar várias áreas do corpo.
(https://my.clevelandclinic.org/health/diseases/17710-eosinophilia)
Monocytes: Os monócitos são um tipo de glóbulo branco do sistema imunológico, em que se transformam em macrófagos ou células dendríticas quando um germe ou bactéria invasora entra em seu corpo. As células matam o invasor ou alertam outras células sanguíneas para ajudar a destruí-lo e prevenir a infecção.
(https://my.clevelandclinic.org/health/body/22110-monocytes)
Basophils: Os basófilos são um tipo de glóbulo branco que trabalha em estreita colaboração com o sistema imunológico para defender o corpo contra alérgenos, patógenos e parasitas, em que liberam enzimas para melhorar o fluxo sanguíneo e prevenir coágulos sanguíneos.
(https://my.clevelandclinic.org/health/body/23256-basophils)
HLRatio: O HLRatio é geralmente considerada um indicador independente e robusto do nível de estresse em aves. Este parâmetro permite avaliar de forma simples a atividade do sistema imunológico e o estado de saúde individual de aves adultas e filhotes. Também permite avaliar a resposta do organismo ao estresse de curto e longo prazo induzido, entre outros, pelo ambiente envolvente, estresse social, parasitas sanguíneos ou um maior gasto energético das fêmeas durante a reprodução.
summary(df)
## Species FatScore BodyMass Heterophils
## Length:415 Min. :0.000 Min. : 20.00 Min. : 1.00
## Class :character 1st Qu.:0.000 1st Qu.: 45.00 1st Qu.:23.00
## Mode :character Median :3.000 Median : 59.00 Median :35.00
## Mean :2.701 Mean : 76.03 Mean :37.07
## 3rd Qu.:5.000 3rd Qu.: 96.00 3rd Qu.:49.00
## Max. :7.000 Max. :236.00 Max. :89.00
## NA's :3
## Lymphocytes Eosinophils Monocytes Basophils
## Min. : 7.00 Min. : 0.000 Min. :0.0000 Min. : 0.000
## 1st Qu.:38.00 1st Qu.: 1.000 1st Qu.:0.0000 1st Qu.: 2.000
## Median :53.00 Median : 3.000 Median :0.0000 Median : 4.000
## Mean :51.46 Mean : 6.166 Mean :0.9735 Mean : 4.325
## 3rd Qu.:63.00 3rd Qu.: 9.000 3rd Qu.:2.0000 3rd Qu.: 6.000
## Max. :92.00 Max. :45.000 Max. :8.0000 Max. :19.000
##
## HLRatio
## Min. : 0.010
## 1st Qu.: 0.370
## Median : 0.660
## Mean : 1.052
## 3rd Qu.: 1.195
## Max. :12.140
##
glimpse(df)
## Rows: 415
## Columns: 9
## $ Species <chr> "Actitis hypoleucos", "Actitis hypoleucos", "Actitis hypol…
## $ FatScore <int> 5, 1, 2, 3, 3, 5, 3, 3, 3, 3, 5, 5, 5, 4, 4, 6, 6, 4, 7, 7…
## $ BodyMass <int> 56, 44, 46, 47, 44, 45, 44, 39, 44, 49, 61, 51, 60, 49, 48…
## $ Heterophils <int> 64, 60, 63, 2, 68, 42, 43, 40, 39, 67, 57, 22, 63, 77, 37,…
## $ Lymphocytes <int> 19, 34, 28, 87, 24, 45, 42, 37, 40, 25, 36, 61, 23, 22, 56…
## $ Eosinophils <int> 5, 0, 5, 4, 6, 6, 6, 5, 18, 2, 5, 6, 4, 1, 1, 4, 18, 6, 18…
## $ Monocytes <int> 0, 1, 0, 2, 0, 3, 0, 1, 1, 3, 0, 1, 0, 0, 0, 3, 2, 2, 3, 1…
## $ Basophils <int> 12, 5, 4, 5, 2, 4, 9, 17, 2, 3, 2, 10, 10, 0, 6, 5, 7, 4, …
## $ HLRatio <dbl> 3.37, 1.76, 2.25, 0.02, 2.83, 0.93, 1.02, 1.08, 0.98, 2.68…
A variável FatScore é do tipo qualitativa ordinal
#df$FatScore <- as.character(df$FatScore)
A variável que se deseja modelar é a variável contínua HLRatio.
hist(df$HLRatio)
cbind(table(df$Species))
## [,1]
## Actitis hypoleucos 22
## Arenaria interpres 32
## Calidris alpina 62
## Calidris canutus 10
## Calidris ferruginea 17
## Calidris minuta 23
## Calidris temminckii 5
## Charadrius dubius 28
## Charadrius hiaticula 34
## Limicola falcinellus 11
## Limosa lapponica 9
## Philomachus pugnax 36
## Pluvialis squatarola 5
## Tringa erythropus 20
## Tringa glareola 38
## Tringa nebularia 7
## Tringa ochropus 7
## Tringa totanus 42
## Vanellus vanellus 7
n_species <- length(cbind(table(df$Species)))
cat("A base de dados apresenta ",n_species, " espécies distintas")
## A base de dados apresenta 19 espécies distintas
par(mfrow = c(2,4))
for (coluna in c(2:9)) {
variavel <- names(df)[coluna]
boxplot(df[,coluna],main = variavel)
}
Devido as diferentes espécies em estudo, é esperado que haja variabilidade dos dados.
# Função para extrair a primeira palavra de uma string
extrair_primeira_palavra <- function(string) {
palavras <- strsplit(string, " ")[[1]]
primeira_palavra <- palavras[1]
return(primeira_palavra)
}
df$Genero <- sapply(df$Species, extrair_primeira_palavra)
cbind(table(df$Genero))
## [,1]
## Actitis 22
## Arenaria 32
## Calidris 117
## Charadrius 62
## Limicola 11
## Limosa 9
## Philomachus 36
## Pluvialis 5
## Tringa 114
## Vanellus 7
n_genero <- length(cbind(table(df$Genero)))
cat("Existem ",n_genero," Gêneros distintos")
## Existem 10 Gêneros distintos
Fonte: Google.com
Fonte: Google.com
Criação de boxplot por genero
par(mfrow = c(8,1))
for (coluna in c(2:9)) {
variavel <- names(df)[coluna]
boxplot(df[,coluna] ~df[,10],main = variavel, xlab = "Gênero")
}
Considerando o tamanho da base de dados, considera-se que haja poucos outliers (subjetivamente).
df_na <- df[!complete.cases(df),]
head(df_na)
## Species FatScore BodyMass Heterophils Lymphocytes Eosinophils
## 170 Calidris temminckii 7 NA 62 34 0
## 288 Philomachus pugnax 3 NA 74 22 2
## 377 Tringa totanus 6 NA 17 62 16
## Monocytes Basophils HLRatio Genero
## 170 0 4 1.82 Calidris
## 288 0 2 3.36 Philomachus
## 377 0 5 0.27 Tringa
linhas_na <- nrow(df_na)
linhas_df <- nrow(df)
linhas_na
## [1] 3
cat("A base de dados possui",linhas_df, "observacoes e",linhas_na, "linhas contendo ao menos um NA")
## A base de dados possui 415 observacoes e 3 linhas contendo ao menos um NA
cbind(colSums(is.na(df)))
## [,1]
## Species 0
## FatScore 0
## BodyMass 3
## Heterophils 0
## Lymphocytes 0
## Eosinophils 0
## Monocytes 0
## Basophils 0
## HLRatio 0
## Genero 0
#median_bm <- median(df$BodyMass,by(df$Species_2),na.rm = TRUE)
#median_bm
# Função para substituir os valores NA pela mediana, agrupada pelo Genero
substituir_na_pela_mediana <- function(valor) {
mediana <- median(valor, na.rm = TRUE)
valor[is.na(valor)] <- mediana
return(valor)
}
#df$BodyMass <- (replace(x = df$BodyMass,
# list = is.na(df$BodyMass),
# values = median_bm)
# )
#Substituição levando em consideração o Genero
df$BodyMass <- ave(df$BodyMass, df$Genero, FUN = substituir_na_pela_mediana)
fit <- aov(HLRatio ~ Genero,df) # Fisher-Snedecor
anova(fit)
## Analysis of Variance Table
##
## Response: HLRatio
## Df Sum Sq Mean Sq F value Pr(>F)
## Genero 9 62.6 6.9561 4.8715 3.307e-06 ***
## Residuals 405 578.3 1.4279
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
bartlett.test(df$HLRatio ~ df$Genero,df)
##
## Bartlett test of homogeneity of variances
##
## data: df$HLRatio by df$Genero
## Bartlett's K-squared = 134.84, df = 9, p-value < 2.2e-16
shapiro.test(fit$residuals) #verificar normalidade (variancias homogeneas)
##
## Shapiro-Wilk normality test
##
## data: fit$residuals
## W = 0.68613, p-value < 2.2e-16
Por meio da Distribuicao Fisher-Snedecor ao nivel de 5% de significancia, verificou-se que as médias de HLRatio são diferentes entre os gêneros, ou seja, ao menos duas médias diferem entre si.
LSD.test(fit, 'Genero', console = T, group = TRUE)
##
## Study: fit ~ "Genero"
##
## LSD t Test for HLRatio
##
## Mean Square Error: 1.427901
##
## Genero, means and individual ( 95 %) CI
##
## HLRatio std r se LCL UCL Min Max
## Actitis 1.5481818 1.2453289 22 0.2547638 1.0473573 2.0490063 0.02 4.47
## Arenaria 0.6825000 0.5611336 32 0.2112390 0.2672382 1.0977618 0.07 2.83
## Calidris 1.0223077 1.4353345 117 0.1104730 0.8051356 1.2394798 0.10 12.14
## Charadrius 0.4088710 0.3844353 62 0.1517586 0.1105381 0.7072038 0.01 1.58
## Limicola 1.0563636 1.0833861 11 0.3602904 0.3480908 1.7646365 0.03 3.85
## Limosa 0.6966667 0.6251400 9 0.3983160 -0.0863584 1.4796917 0.06 1.96
## Philomachus 1.7847222 1.3763751 36 0.1991580 1.3932097 2.1762348 0.11 5.79
## Pluvialis 0.4520000 0.1703526 5 0.5343970 -0.5985384 1.5025384 0.25 0.69
## Tringa 1.2445614 1.2717183 114 0.1119171 1.0245504 1.4645725 0.06 9.89
## Vanellus 1.3400000 1.9974651 7 0.4516479 0.4521330 2.2278670 0.29 5.79
## Q25 Q50 Q75
## Actitis 0.5675 1.050 2.5725
## Arenaria 0.3525 0.500 0.8600
## Calidris 0.4200 0.650 1.0200
## Charadrius 0.1400 0.355 0.5100
## Limicola 0.3800 0.820 1.2250
## Limosa 0.1600 0.590 1.1100
## Philomachus 0.9000 1.335 2.1375
## Pluvialis 0.3500 0.430 0.5400
## Tringa 0.5000 0.860 1.5600
## Vanellus 0.4100 0.520 0.9800
##
## Alpha: 0.05 ; DF Error: 405
## Critical Value of t: 1.965839
##
## Groups according to probability of means differences and alpha level( 0.05 )
##
## Treatments with the same letter are not significantly different.
##
## HLRatio groups
## Philomachus 1.7847222 a
## Actitis 1.5481818 ab
## Vanellus 1.3400000 abc
## Tringa 1.2445614 bc
## Limicola 1.0563636 bc
## Calidris 1.0223077 bc
## Limosa 0.6966667 bc
## Arenaria 0.6825000 c
## Pluvialis 0.4520000 c
## Charadrius 0.4088710 c
pairs.panels(df[,-1])
Há multicolinearidade (| r | > 0,8) entre as variáveis:
modelo <- lm (HLRatio ~ FatScore + BodyMass + Heterophils + Eosinophils + Monocytes + Basophils + Genero, data = df)
modelo
##
## Call:
## lm(formula = HLRatio ~ FatScore + BodyMass + Heterophils + Eosinophils +
## Monocytes + Basophils + Genero, data = df)
##
## Coefficients:
## (Intercept) FatScore BodyMass Heterophils
## -1.2784847 0.0265131 0.0002793 0.0570374
## Eosinophils Monocytes Basophils GeneroArenaria
## 0.0149864 0.0374214 -0.0007469 -0.0708592
## GeneroCalidris GeneroCharadrius GeneroLimicola GeneroLimosa
## 0.0107790 0.1870199 -0.0729608 -0.0292204
## GeneroPhilomachus GeneroPluvialis GeneroTringa GeneroVanellus
## -0.1472890 0.0363227 -0.0631685 0.2435220
par(mfrow = c(2,2))
plot(modelo)
summary(modelo)
##
## Call:
## lm(formula = HLRatio ~ FatScore + BodyMass + Heterophils + Eosinophils +
## Monocytes + Basophils + Genero, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8289 -0.3651 -0.1409 0.2112 8.3661
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.2784847 0.2410434 -5.304 1.88e-07 ***
## FatScore 0.0265131 0.0180987 1.465 0.1437
## BodyMass 0.0002793 0.0015530 0.180 0.8574
## Heterophils 0.0570374 0.0021548 26.470 < 2e-16 ***
## Eosinophils 0.0149864 0.0059477 2.520 0.0121 *
## Monocytes 0.0374214 0.0277823 1.347 0.1788
## Basophils -0.0007469 0.0113033 -0.066 0.9473
## GeneroArenaria -0.0708592 0.2122515 -0.334 0.7387
## GeneroCalidris 0.0107790 0.1684497 0.064 0.9490
## GeneroCharadrius 0.1870199 0.1960234 0.954 0.3406
## GeneroLimicola -0.0729608 0.2680936 -0.272 0.7857
## GeneroLimosa -0.0292204 0.3664346 -0.080 0.9365
## GeneroPhilomachus -0.1472890 0.2218102 -0.664 0.5071
## GeneroPluvialis 0.0363227 0.4033447 0.090 0.9283
## GeneroTringa -0.0631685 0.1816493 -0.348 0.7282
## GeneroVanellus 0.2435220 0.3972058 0.613 0.5402
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7033 on 399 degrees of freedom
## Multiple R-squared: 0.6921, Adjusted R-squared: 0.6805
## F-statistic: 59.78 on 15 and 399 DF, p-value: < 2.2e-16
par(mfrow = c(1,2))
hist(modelo$residuals)
hnp::hnp(modelo$residuals)
## Half-normal plot with simulated envelope generated assuming the residuals are
## normally distributed under the null hypothesis.
modelo <- lm(HLRatio ~ FatScore + Lymphocytes + BodyMass + Eosinophils + Monocytes + Basophils + Genero, data = df)
modelo
##
## Call:
## lm(formula = HLRatio ~ FatScore + Lymphocytes + BodyMass + Eosinophils +
## Monocytes + Basophils + Genero, data = df)
##
## Coefficients:
## (Intercept) FatScore Lymphocytes BodyMass
## 4.4252596 0.0265131 -0.0570374 0.0002793
## Eosinophils Monocytes Basophils GeneroArenaria
## -0.0420511 -0.0196161 -0.0577844 -0.0708592
## GeneroCalidris GeneroCharadrius GeneroLimicola GeneroLimosa
## 0.0107790 0.1870199 -0.0729608 -0.0292204
## GeneroPhilomachus GeneroPluvialis GeneroTringa GeneroVanellus
## -0.1472890 0.0363227 -0.0631685 0.2435220
par(mfrow = c(2,2))
plot(modelo)
summary(modelo)
##
## Call:
## lm(formula = HLRatio ~ FatScore + Lymphocytes + BodyMass + Eosinophils +
## Monocytes + Basophils + Genero, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8289 -0.3651 -0.1409 0.2112 8.3661
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.4252596 0.2218088 19.951 < 2e-16 ***
## FatScore 0.0265131 0.0180987 1.465 0.144
## Lymphocytes -0.0570374 0.0021548 -26.470 < 2e-16 ***
## BodyMass 0.0002793 0.0015530 0.180 0.857
## Eosinophils -0.0420511 0.0055996 -7.510 3.92e-13 ***
## Monocytes -0.0196161 0.0274871 -0.714 0.476
## Basophils -0.0577844 0.0111706 -5.173 3.66e-07 ***
## GeneroArenaria -0.0708592 0.2122515 -0.334 0.739
## GeneroCalidris 0.0107790 0.1684497 0.064 0.949
## GeneroCharadrius 0.1870199 0.1960234 0.954 0.341
## GeneroLimicola -0.0729608 0.2680936 -0.272 0.786
## GeneroLimosa -0.0292204 0.3664346 -0.080 0.936
## GeneroPhilomachus -0.1472890 0.2218102 -0.664 0.507
## GeneroPluvialis 0.0363227 0.4033447 0.090 0.928
## GeneroTringa -0.0631685 0.1816493 -0.348 0.728
## GeneroVanellus 0.2435220 0.3972058 0.613 0.540
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7033 on 399 degrees of freedom
## Multiple R-squared: 0.6921, Adjusted R-squared: 0.6805
## F-statistic: 59.78 on 15 and 399 DF, p-value: < 2.2e-16
par(mfrow = c(1,2))
hist(modelo$residuals)
hnp::hnp(modelo$residuals)
## Half-normal plot with simulated envelope generated assuming the residuals are
## normally distributed under the null hypothesis.
step(modelo)
## Start: AIC=-276.44
## HLRatio ~ FatScore + Lymphocytes + BodyMass + Eosinophils + Monocytes +
## Basophils + Genero
##
## Df Sum of Sq RSS AIC
## - Genero 9 2.78 200.15 -288.63
## - BodyMass 1 0.02 197.38 -278.40
## - Monocytes 1 0.25 197.62 -277.91
## <none> 197.37 -276.44
## - FatScore 1 1.06 198.43 -276.21
## - Basophils 1 13.24 210.60 -251.50
## - Eosinophils 1 27.90 225.26 -223.57
## - Lymphocytes 1 346.59 543.96 142.29
##
## Step: AIC=-288.63
## HLRatio ~ FatScore + Lymphocytes + BodyMass + Eosinophils + Monocytes +
## Basophils
##
## Df Sum of Sq RSS AIC
## - BodyMass 1 0.13 200.28 -290.35
## - Monocytes 1 0.28 200.43 -290.05
## - FatScore 1 0.66 200.81 -289.26
## <none> 200.15 -288.63
## - Basophils 1 15.06 215.20 -260.53
## - Eosinophils 1 34.33 234.48 -224.93
## - Lymphocytes 1 380.36 580.51 151.28
##
## Step: AIC=-290.35
## HLRatio ~ FatScore + Lymphocytes + Eosinophils + Monocytes +
## Basophils
##
## Df Sum of Sq RSS AIC
## - Monocytes 1 0.28 200.56 -291.77
## - FatScore 1 0.82 201.10 -290.65
## <none> 200.28 -290.35
## - Basophils 1 14.94 215.22 -262.50
## - Eosinophils 1 34.23 234.51 -226.88
## - Lymphocytes 1 383.52 583.80 151.63
##
## Step: AIC=-291.77
## HLRatio ~ FatScore + Lymphocytes + Eosinophils + Basophils
##
## Df Sum of Sq RSS AIC
## - FatScore 1 0.95 201.51 -291.81
## <none> 200.56 -291.77
## - Basophils 1 14.76 215.32 -264.31
## - Eosinophils 1 34.17 234.73 -228.48
## - Lymphocytes 1 392.31 592.87 156.03
##
## Step: AIC=-291.81
## HLRatio ~ Lymphocytes + Eosinophils + Basophils
##
## Df Sum of Sq RSS AIC
## <none> 201.51 -291.81
## - Basophils 1 14.12 215.63 -265.70
## - Eosinophils 1 35.76 237.27 -226.02
## - Lymphocytes 1 394.61 596.12 156.30
##
## Call:
## lm(formula = HLRatio ~ Lymphocytes + Eosinophils + Basophils,
## data = df)
##
## Coefficients:
## (Intercept) Lymphocytes Eosinophils Basophils
## 4.41657 -0.05608 -0.03806 -0.05641
#hist(modelo$residuals)
#summary(modelo)
#anova(modelo)
#AIC(modelo)
Sugerido pelo STEP(modelo)
modelo <- lm(HLRatio ~ Lymphocytes + Eosinophils + Basophils, data = df)
par(mfrow=c(2,2))
plot(modelo)
par(mfrow = c(1,2))
hist(modelo$residuals)
hnp::hnp(modelo$residuals)
## Half-normal plot with simulated envelope generated assuming the residuals are
## normally distributed under the null hypothesis.
Como esperado, será necessário dividir a base em subsets por gênero, para realizar a análise de regressão.
table(df$Genero)
##
## Actitis Arenaria Calidris Charadrius Limicola Limosa
## 22 32 117 62 11 9
## Philomachus Pluvialis Tringa Vanellus
## 36 5 114 7
df_actitis <- subset(df,df$Genero=="Actitis")
head(df_actitis)
## Species FatScore BodyMass Heterophils Lymphocytes Eosinophils
## 1 Actitis hypoleucos 5 56 64 19 5
## 2 Actitis hypoleucos 1 44 60 34 0
## 3 Actitis hypoleucos 2 46 63 28 5
## 4 Actitis hypoleucos 3 47 2 87 4
## 5 Actitis hypoleucos 3 44 68 24 6
## 6 Actitis hypoleucos 5 45 42 45 6
## Monocytes Basophils HLRatio Genero
## 1 0 12 3.37 Actitis
## 2 1 5 1.76 Actitis
## 3 0 4 2.25 Actitis
## 4 2 5 0.02 Actitis
## 5 0 2 2.83 Actitis
## 6 3 4 0.93 Actitis
df_arenaria <- subset(df,df$Genero=="Arenaria")
head(df_arenaria)
## Species FatScore BodyMass Heterophils Lymphocytes Eosinophils
## 23 Arenaria interpres 5 95 22 61 0
## 24 Arenaria interpres 5 94 50 45 2
## 25 Arenaria interpres 4 91 30 62 1
## 26 Arenaria interpres 0 86 24 73 0
## 27 Arenaria interpres 0 81 26 63 3
## 28 Arenaria interpres 0 73 42 53 0
## Monocytes Basophils HLRatio Genero
## 23 0 17 0.36 Arenaria
## 24 1 2 1.11 Arenaria
## 25 1 6 0.48 Arenaria
## 26 3 0 0.33 Arenaria
## 27 3 5 0.41 Arenaria
## 28 3 2 0.79 Arenaria
df_calidris <- subset(df,df$Genero=="Calidris")
head(df_calidris)
## Species FatScore BodyMass Heterophils Lymphocytes Eosinophils
## 55 Calidris alpina 0 39 47 31 1
## 56 Calidris alpina 0 42 44 44 1
## 57 Calidris alpina 3 50 16 76 1
## 58 Calidris alpina 1 47 47 46 3
## 59 Calidris alpina 6 54 23 68 0
## 60 Calidris alpina 3 43 85 7 2
## Monocytes Basophils HLRatio Genero
## 55 2 19 1.52 Calidris
## 56 0 11 1.00 Calidris
## 57 0 7 0.21 Calidris
## 58 0 4 1.02 Calidris
## 59 0 9 0.34 Calidris
## 60 2 4 12.14 Calidris
df_charadrius <- subset(df,df$Genero=="Charadrius")
head(df_charadrius)
## Species FatScore BodyMass Heterophils Lymphocytes Eosinophils
## 172 Charadrius dubius 0 31 41 45 11
## 173 Charadrius dubius 0 29 55 36 4
## 174 Charadrius dubius 4 35 25 57 14
## 175 Charadrius dubius 1 33 55 37 0
## 176 Charadrius dubius 5 35 22 56 18
## 177 Charadrius dubius 5 45 15 60 22
## Monocytes Basophils HLRatio Genero
## 172 1 2 0.91 Charadrius
## 173 1 4 1.53 Charadrius
## 174 0 4 0.44 Charadrius
## 175 0 8 1.49 Charadrius
## 176 1 3 0.39 Charadrius
## 177 0 3 0.25 Charadrius
df_limicola <- subset(df,df$Genero=="Limicola")
head(df_limicola)
## Species FatScore BodyMass Heterophils Lymphocytes Eosinophils
## 234 Limicola falcinellus 2 30 30 60 3
## 235 Limicola falcinellus 1 30 56 38 6
## 236 Limicola falcinellus 1 25 64 33 1
## 237 Limicola falcinellus 5 32 40 53 2
## 238 Limicola falcinellus 4 34 19 74 5
## 239 Limicola falcinellus 5 30 77 20 1
## Monocytes Basophils HLRatio Genero
## 234 3 4 0.50 Limicola
## 235 0 0 1.47 Limicola
## 236 1 1 1.94 Limicola
## 237 2 3 0.75 Limicola
## 238 2 0 0.26 Limicola
## 239 1 1 3.85 Limicola
df_limosa <- subset(df,df$Genero=="Limosa")
head(df_limosa)
## Species FatScore BodyMass Heterophils Lymphocytes Eosinophils
## 245 Limosa lapponica 0 225 35 55 7
## 246 Limosa lapponica 2 177 30 59 5
## 247 Limosa lapponica 3 215 13 83 3
## 248 Limosa lapponica 0 192 55 28 11
## 249 Limosa lapponica 0 220 5 79 7
## 250 Limosa lapponica 0 162 49 42 1
## Monocytes Basophils HLRatio Genero
## 245 1 2 0.64 Limosa
## 246 1 5 0.51 Limosa
## 247 1 0 0.16 Limosa
## 248 6 0 1.96 Limosa
## 249 8 1 0.06 Limosa
## 250 6 2 1.17 Limosa
df_philomachus <- subset(df,df$Genero=="Philomachus")
head(df_philomachus)
## Species FatScore BodyMass Heterophils Lymphocytes Eosinophils
## 254 Philomachus pugnax 3 157 39 52 1
## 255 Philomachus pugnax 2 146 25 66 5
## 256 Philomachus pugnax 3 166 33 61 1
## 257 Philomachus pugnax 5 157 28 69 1
## 258 Philomachus pugnax 5 146 47 41 4
## 259 Philomachus pugnax 0 145 46 50 2
## Monocytes Basophils HLRatio Genero
## 254 0 8 0.75 Philomachus
## 255 0 4 0.38 Philomachus
## 256 0 5 0.54 Philomachus
## 257 0 2 0.41 Philomachus
## 258 3 5 1.15 Philomachus
## 259 0 2 0.92 Philomachus
modelo_a1 <- lm(HLRatio ~ FatScore + BodyMass + Heterophils + Eosinophils + Monocytes + Basophils , data = df_actitis)
modelo_a1
##
## Call:
## lm(formula = HLRatio ~ FatScore + BodyMass + Heterophils + Eosinophils +
## Monocytes + Basophils, data = df_actitis)
##
## Coefficients:
## (Intercept) FatScore BodyMass Heterophils Eosinophils Monocytes
## -0.754499 0.063614 -0.005430 0.053124 -0.010397 -0.040394
## Basophils
## 0.003258
summary(modelo_a1)
##
## Call:
## lm(formula = HLRatio ~ FatScore + BodyMass + Heterophils + Eosinophils +
## Monocytes + Basophils, data = df_actitis)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63499 -0.40193 -0.04674 0.17199 1.16778
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.754499 1.091703 -0.691 0.500
## FatScore 0.063614 0.132094 0.482 0.637
## BodyMass -0.005430 0.026522 -0.205 0.841
## Heterophils 0.053124 0.007766 6.840 5.6e-06 ***
## Eosinophils -0.010397 0.026374 -0.394 0.699
## Monocytes -0.040394 0.128129 -0.315 0.757
## Basophils 0.003258 0.032523 0.100 0.922
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.555 on 15 degrees of freedom
## Multiple R-squared: 0.8581, Adjusted R-squared: 0.8014
## F-statistic: 15.12 on 6 and 15 DF, p-value: 1.346e-05
modelo_a1 <- lm(HLRatio ~ Heterophils , data = df_actitis)
summary(modelo_a1)
##
## Call:
## lm(formula = HLRatio ~ Heterophils, data = df_actitis)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.61703 -0.28991 -0.09782 0.19615 1.24173
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.89671 0.24935 -3.596 0.0018 **
## Heterophils 0.05428 0.00503 10.791 8.66e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4885 on 20 degrees of freedom
## Multiple R-squared: 0.8534, Adjusted R-squared: 0.8461
## F-statistic: 116.5 on 1 and 20 DF, p-value: 8.66e-10
par(mfrow = c(2,2))
plot(modelo_a1)
par(mfrow = c(1,2))
hist(modelo_a1$residuals)
hnp::hnp(modelo_a1$residuals)
## Half-normal plot with simulated envelope generated assuming the residuals are
## normally distributed under the null hypothesis.
modelo_a2 <- glm(HLRatio~FatScore + BodyMass + Eosinophils + Monocytes + Basophils, data = df_actitis)
modelo_a2
##
## Call: glm(formula = HLRatio ~ FatScore + BodyMass + Eosinophils + Monocytes +
## Basophils, data = df_actitis)
##
## Coefficients:
## (Intercept) FatScore BodyMass Eosinophils Monocytes Basophils
## 1.50678 -0.03475 0.03102 -0.08970 -0.45515 -0.05011
##
## Degrees of Freedom: 21 Total (i.e. Null); 16 Residual
## Null Deviance: 32.57
## Residual Deviance: 19.03 AIC: 73.24
As variáveis “Lymphocytis” e “HLRatio” não foram incluídas no modelo devido menor relação com a variável resposta.
par(mfrow = c(2,2))
plot(modelo_a2)
par(mfrow = c(1,2))
hist(modelo_a2$residuals)
hnp::hnp(modelo_a2$residuals)
## Half-normal plot with simulated envelope generated assuming the residuals are
## normally distributed under the null hypothesis.
AIC(modelo_a1,modelo_a2)
## df AIC
## modelo_a1 3 34.81775
## modelo_a2 7 73.24464
modelo_limosa_1 <- glm(HLRatio ~ FatScore + BodyMass + Heterophils + Eosinophils + Monocytes + Basophils , data = df_limosa)
modelo_limosa_1
##
## Call: glm(formula = HLRatio ~ FatScore + BodyMass + Heterophils + Eosinophils +
## Monocytes + Basophils, data = df_limosa)
##
## Coefficients:
## (Intercept) FatScore BodyMass Heterophils Eosinophils Monocytes
## -0.5698636 0.1180102 -0.0002396 0.0343418 0.0235046 0.0539964
## Basophils
## -0.0728783
##
## Degrees of Freedom: 8 Total (i.e. Null); 2 Residual
## Null Deviance: 3.126
## Residual Deviance: 0.08466 AIC: -0.4564
As variáveis “Lymphocytis” e “HLRatio” não foram incluídas no modelo devido menor relação com a variável resposta.
par(mfrow = c(2,2))
plot(modelo_limosa_1)
par(mfrow = c(1,2))
hist(modelo_limosa_1$residuals)
hnp::hnp(modelo_limosa_1$residuals)
## Half-normal plot with simulated envelope generated assuming the residuals are
## normally distributed under the null hypothesis.
modelo_limosa_2 <- glm(HLRatio ~ FatScore + BodyMass + Eosinophils + Monocytes + Basophils, data = df_limosa)
modelo_limosa_2
##
## Call: glm(formula = HLRatio ~ FatScore + BodyMass + Eosinophils + Monocytes +
## Basophils, data = df_limosa)
##
## Coefficients:
## (Intercept) FatScore BodyMass Eosinophils Monocytes Basophils
## 2.86402 -0.13569 -0.01220 0.04194 0.03799 -0.03557
##
## Degrees of Freedom: 8 Total (i.e. Null); 3 Residual
## Null Deviance: 3.126
## Residual Deviance: 1.66 AIC: 24.33
As variáveis “Lymphocytis” e “HLRatio” não foram incluídas no modelo devido menor relação com a variável resposta.
par(mfrow = c(2,2))
plot(modelo_limosa_2)
par(mfrow = c(1,2))
hist(modelo_limosa_2$residuals)
hnp::hnp(modelo_limosa_2$residuals)
## Half-normal plot with simulated envelope generated assuming the residuals are
## normally distributed under the null hypothesis.
AIC(modelo_limosa_1,modelo_limosa_2)
## df AIC
## modelo_limosa_1 8 -0.4563778
## modelo_limosa_2 7 24.3279476